{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.display import display, HTML\n",
    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "NIIhBrDA_lL6"
   },
   "source": [
    "# Between Human Dignity and Security: Identifying Citizen and Elite Preferences and Concerns over Refugee Reception\n",
    "\n",
    "---\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "6AOWtu3d_0-p"
   },
   "source": [
    "## **Loading data & modules**\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Sl6zLlPWU760",
    "outputId": "2b745d49-6db8-4ab1-a0f3-f85b2e01e5b1"
   },
   "outputs": [],
   "source": [
    "# Importing libraries and modules\n",
    "import os\n",
    "import pandas as pd\n",
    "import plotly.graph_objects as go\n",
    "\n",
    "from src.text_normalizations import (\n",
    "    abbreviation_creator,\n",
    "    bigram_topic_matrix_creator,\n",
    "    normalize_text_for_topic_analysis,\n",
    "    text_normalizer,\n",
    "    topic_dictionary,\n",
    "    topic_cleaner,\n",
    "    unify_citizens_councilors_texts,\n",
    "    unigram_topic_matrix_creator,\n",
    ")\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read config json\n",
    "config_path = \"config.json\"\n",
    "with open(config_path, \"r\") as config:\n",
    "    config_dict = json.load(config)\n",
    "\n",
    "# Assign paths to variables\n",
    "citizens_full_path = config_dict[\"citizens full excel path\"]\n",
    "councilors_full_path = config_dict[\"councilors full excel path\"]\n",
    "text_analysis_path = config_dict[\"text analysis data excel path\"]\n",
    "councilors_spelled_path = config_dict[\"councilors spelled excel path\"]\n",
    "stopwords_path = config_dict[\"stopwords excel path\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "PRLmog6S0kEL"
   },
   "outputs": [],
   "source": [
    "# Loading datasets and stopwords\n",
    "citizens_full = pd.read_excel(citizens_full_path)\n",
    "councilors_full = pd.read_excel(councilors_full_path)\n",
    "text_analysis = pd.read_excel(text_analysis_path)\n",
    "text_analysis.dropna(inplace=True)\n",
    "text_analysis = pd.merge(\n",
    "    left=text_analysis,\n",
    "    right=citizens_full,\n",
    "    left_on=\"Anonymous_id\",\n",
    "    right_on=\"Anonymous_id\",\n",
    ")\n",
    "text_analysis.dropna(subset=[\"Q30_campfeedback\"], inplace=True)\n",
    "councilors_spelled = pd.read_excel(councilors_spelled_path)\n",
    "councilors_spelled.dropna(subset=[\"Q30\"], inplace=True)\n",
    "councilors_spelled = pd.merge(\n",
    "    left=councilors_spelled,\n",
    "    right=councilors_full,\n",
    "    left_on=\"id_anonymous\",\n",
    "    right_on=\"id_anonymous\",\n",
    ")\n",
    "councilors_spelled.dropna(subset=[\"Q30\"], inplace=True)\n",
    "stop_words_df = pd.read_excel(stopwords_path, sheet_name=\"stopwords\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TVjWJqWTSxDX"
   },
   "outputs": [],
   "source": [
    "# Apply text normalization\n",
    "text_analysis[\"cleaned\"] = text_analysis.Q30_campfeedback.apply(\n",
    "    lambda x: text_normalizer(x)\n",
    ")\n",
    "councilors_spelled[\"cleaned\"] = councilors_spelled.Q30.apply(\n",
    "    lambda x: text_normalizer(x)\n",
    ")\n",
    "\n",
    "# Extract abbreviations\n",
    "text_analysis[\"cleaned\"] = text_analysis.cleaned.apply(\n",
    "    lambda x: abbreviation_creator(x)\n",
    ")\n",
    "councilors_spelled[\"cleaned\"] = councilors_spelled.cleaned.apply(\n",
    "    lambda x: abbreviation_creator(x)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "wKjhtgdRaigL"
   },
   "outputs": [],
   "source": [
    "stem_dict_1, stem_dict_2 = unify_citizens_councilors_texts(\n",
    "    citizens_df=text_analysis, councilors_df=councilors_spelled\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "vel_JolvnDFe"
   },
   "source": [
    "## Topic Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "text_analysis, councilors_spelled = normalize_text_for_topic_analysis(\n",
    "    text_analysis=text_analysis,\n",
    "    councilors_spelled=councilors_spelled,\n",
    "    stem_dict_1=stem_dict_1,\n",
    "    stem_dict_2=stem_dict_2,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "JtIvP6_6nDFv"
   },
   "outputs": [],
   "source": [
    "# Drop nan values from both citizens and councilors\n",
    "citizens = text_analysis.dropna(subset=[\"cleaned\"])\n",
    "councilors = councilors_spelled.dropna(subset=[\"cleaned\"])\n",
    "\n",
    "old_cols = [\n",
    "    \"Identity Characteristics\",\n",
    "    \"Legal rationale\",\n",
    "    \"Cultural/ Social  concerns\",\n",
    "    \"Public order concerns\",\n",
    "    \"Economic concerns\",\n",
    "    \"Humanitarian concerns\",\n",
    "    \"Mobility concerns\",\n",
    "    \"Trust in authorities\",\n",
    "    \"Fairness\",\n",
    "]\n",
    "\n",
    "new_cols = [\n",
    "    \"Identity\",\n",
    "    \"Legal\",\n",
    "    \"Cultural/Social\",\n",
    "    \"Public Order\",\n",
    "    \"Economic\",\n",
    "    \"Humanitarian\",\n",
    "    \"Mobility\",\n",
    "    \"Trust Authorities\",\n",
    "    \"Fairness\",\n",
    "]\n",
    "\n",
    "citizens = citizens.rename(\n",
    "    columns={\n",
    "        col: new_col\n",
    "        for col, new_col in zip(\n",
    "            old_cols,\n",
    "            new_cols,\n",
    "        )\n",
    "    }\n",
    ")\n",
    "\n",
    "councilors = councilors.rename(\n",
    "    columns={col: new_col for col, new_col in zip(old_cols, new_cols)}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "AuePFUb5nDFw"
   },
   "outputs": [],
   "source": [
    "# Read topics' unigrams and bigrams\n",
    "topics_path = config_dict[\"topic excel path\"]\n",
    "\n",
    "unigrams = pd.read_excel(topics_path, sheet_name=\"unigrams\")\n",
    "bigrams = pd.read_excel(topics_path, sheet_name=\"bigrams\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "5lskV5BonDFw"
   },
   "source": [
    "### Figure 7: Topic analysis on citizens and councilors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "eHp10N_ZnDFx"
   },
   "outputs": [],
   "source": [
    "# Apply topic cleaner on each dataframe\n",
    "citizens.cleaned = citizens.cleaned.apply(lambda x: topic_cleaner(x))\n",
    "councilors.cleaned = councilors.cleaned.apply(lambda x: topic_cleaner(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "77DKaJIOnDFx"
   },
   "outputs": [],
   "source": [
    "# Drop column from both dataframes\n",
    "unigrams.drop(1, inplace=True)\n",
    "bigrams.drop(1, inplace=True)\n",
    "\n",
    "# Extract unigrams and bigrams\n",
    "unigrams = unigrams.iloc[:,1:]\n",
    "bigrams = bigrams.iloc[:,1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "l6i4UuglnDFy",
    "outputId": "1ec94097-4f93-47fd-b70a-3e4c19667a72"
   },
   "outputs": [],
   "source": [
    "# Fill nan values with the string 'nothing'\n",
    "unigrams.fillna(\"nothing\", inplace=True)\n",
    "bigrams.fillna(\"nothing\", inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1MJIQKZInDFy"
   },
   "outputs": [],
   "source": [
    "# Create unigrams and bigrams dictionaries\n",
    "unigram_dict = topic_dictionary(unigrams) \n",
    "bigram_dict = topic_dictionary(bigrams)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "OEv8tHK3nDFz"
   },
   "outputs": [],
   "source": [
    "# Create unigram and bigram matrices for both citizens and councilors\n",
    "citizens_unigrams_matrix = unigram_topic_matrix_creator(\n",
    "    dataframe=citizens, topic_dic=unigram_dict\n",
    ")\n",
    "citizens_bigrams_matrix = bigram_topic_matrix_creator(\n",
    "    dataframe=citizens, topic_dic=bigram_dict\n",
    ")\n",
    "\n",
    "councilors_unigrams_matrix = unigram_topic_matrix_creator(\n",
    "    dataframe=councilors, topic_dic=unigram_dict\n",
    ")\n",
    "councilors_bigrams_matrix = bigram_topic_matrix_creator(\n",
    "    dataframe=councilors, topic_dic=bigram_dict\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "T3kheaO9nDF0"
   },
   "outputs": [],
   "source": [
    "# Add unigrams and bigrams matrices to the associated matrices\n",
    "citizens_matrix, councilors_matrix = (\n",
    "    citizens_unigrams_matrix + citizens_bigrams_matrix,\n",
    "    councilors_unigrams_matrix + councilors_bigrams_matrix,\n",
    ")\n",
    "\n",
    "# Iterate through citizens matrix\n",
    "for row in range(citizens_matrix.shape[0]):\n",
    "    for column in range(citizens_matrix.shape[1]):\n",
    "        # If element is greater than 0, assign to it the value 1\n",
    "        if citizens_matrix[row, column] > 0:\n",
    "            citizens_matrix[row, column] = 1\n",
    "\n",
    "# Iterate through citizens matrix\n",
    "for row in range(councilors_matrix.shape[0]):\n",
    "    for column in range(councilors_matrix.shape[1]):\n",
    "        # If element is greater than 0, assign to it the value 1\n",
    "        if councilors_matrix[row, column] > 0:\n",
    "            councilors_matrix[row, column] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "vnnSVRVqnDF0"
   },
   "outputs": [],
   "source": [
    "# Concatenate matrices to then original dataframes\n",
    "citizens_df = pd.concat([citizens, pd.DataFrame(citizens_matrix,\n",
    "             columns=new_cols)], axis=1)\n",
    "\n",
    "# Concatenate matrices to then original dataframes\n",
    "councilors_df = pd.concat([councilors, pd.DataFrame(councilors_matrix,\n",
    "             columns=new_cols)], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "bjjtYzuMnDF1"
   },
   "outputs": [],
   "source": [
    "# If political orientation is greater than 4, assign \"Δεξιοί\" else \"Αριστεροί\"\n",
    "citizens_df.pol_orient = citizens_df.pol_orient.apply(\n",
    "    lambda x: \"Δεξιοί\" if x > 4 else \"Αριστεροί\"\n",
    ")\n",
    "councilors_df.pol_orient_x = councilors_df.pol_orient_x.apply(\n",
    "    lambda x: \"Δεξιοί\" if x > 4 else \"Αριστεροί\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ZTnQW0WrnDF4"
   },
   "outputs": [],
   "source": [
    "# Group each dataframe by political orientation column\n",
    "citizens_pol_orient_df = citizens_df.groupby(\"pol_orient\")[new_cols].sum()\n",
    "\n",
    "councilors_pol_orient_df = councilors_df.groupby(\"pol_orient_x\")[new_cols].sum()\n",
    "\n",
    "# Apply feature engineering by creating a column \"Total\" representing the sum of topics values\n",
    "citizens_pol_orient_df.loc[\"Total\"] = citizens_pol_orient_df.sum()\n",
    "councilors_pol_orient_df.loc[\"Total\"] = councilors_pol_orient_df.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 331
    },
    "id": "o4TIEtaDnDF5",
    "outputId": "78522d28-14a2-4d08-cab7-8fed51c29bf7"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Create dataframe where the columns are topic names and rows are the associated topic occurrences\n",
    "citizens_topics = pd.DataFrame(\n",
    "    dict(citizens_df[new_cols].sum()), index=[\"Total\"]\n",
    ").T.reset_index()\n",
    "\n",
    "# Divide \"Total\" column with the sum of the topics in order to get topic frequencies\n",
    "citizens_topics.Total = citizens_topics.Total.apply(\n",
    "    lambda x: x / (citizens_df[citizens_df.iloc[:, -9:].sum(axis=1) != 0].shape[0])\n",
    ")\n",
    "\n",
    "# Calculate statistics for each column\n",
    "summary = citizens_df[new_cols].describe().T\n",
    "summary[\"mean\"] = summary[\"mean\"]\n",
    "summary[\"std\"] = summary[\"std\"]\n",
    "summary[\"n\"] = len(citizens_df)\n",
    "\n",
    "# Compute confidence intervals\n",
    "confidence_level = 0.95\n",
    "z_score = 1.96  # Z-score for 95% CI\n",
    "summary[\"margin_of_error\"] = z_score * (summary[\"std\"] / np.sqrt(summary[\"n\"]))\n",
    "summary[\"lower_ci\"] = summary[\"mean\"] - summary[\"margin_of_error\"]\n",
    "summary[\"upper_ci\"] = summary[\"mean\"] + summary[\"margin_of_error\"]\n",
    "\n",
    "# Create a DataFrame for plotting\n",
    "plot_data = pd.DataFrame(\n",
    "    {\n",
    "        \"Column\": summary.index,\n",
    "        \"Mean\": summary[\"mean\"],\n",
    "        \"Lower CI\": summary[\"lower_ci\"],\n",
    "        \"Upper CI\": summary[\"upper_ci\"],\n",
    "        \"Margin of Error\": summary[\"margin_of_error\"],\n",
    "    }\n",
    ")\n",
    "\n",
    "citizens_topics[\"Mean\"] = plot_data[\"Mean\"].values.tolist()\n",
    "citizens_topics[\"Lower CI\"] = plot_data[\"Lower CI\"].values.tolist()\n",
    "citizens_topics[\"Upper CI\"] = plot_data[\"Upper CI\"].values.tolist()\n",
    "citizens_topics[\"Margin of Error\"] = plot_data[\"Margin of Error\"].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 331
    },
    "id": "FYxvke_MnDF5",
    "outputId": "3f176872-ab88-43d4-fab4-1c6e2ed358c9"
   },
   "outputs": [],
   "source": [
    "# Create dataframe where the columns are topic names and rows are the associated topic occurrences\n",
    "councilors_topics = pd.DataFrame(\n",
    "    dict(councilors_df[new_cols].sum()), index=[\"Total\"]\n",
    ").T.reset_index()\n",
    "\n",
    "# Divide \"Total\" column with the sum of the topics in order to get topic frequencies\n",
    "councilors_topics.Total = councilors_topics.Total.apply(\n",
    "    lambda x: x / (councilors_df[councilors_df.iloc[:, -9:].sum(axis=1) != 0].shape[0])\n",
    ")\n",
    "\n",
    "# Calculate statistics for each column\n",
    "summary = councilors_df[new_cols].describe().T\n",
    "summary[\"mean\"] = summary[\"mean\"]\n",
    "summary[\"std\"] = summary[\"std\"]\n",
    "summary[\"n\"] = len(councilors_df)\n",
    "\n",
    "# Compute confidence intervals\n",
    "confidence_level = 0.95\n",
    "z_score = 1.96  # Z-score for 95% CI\n",
    "summary[\"margin_of_error\"] = z_score * (summary[\"std\"] / np.sqrt(summary[\"n\"]))\n",
    "summary[\"lower_ci\"] = summary[\"mean\"] - summary[\"margin_of_error\"]\n",
    "summary[\"upper_ci\"] = summary[\"mean\"] + summary[\"margin_of_error\"]\n",
    "\n",
    "# Create a DataFrame for plotting\n",
    "plot_data = pd.DataFrame(\n",
    "    {\n",
    "        \"Column\": summary.index,\n",
    "        \"Mean\": summary[\"mean\"],\n",
    "        \"Lower CI\": summary[\"lower_ci\"],\n",
    "        \"Upper CI\": summary[\"upper_ci\"],\n",
    "    }\n",
    ")\n",
    "\n",
    "councilors_topics[\"Mean\"] = plot_data[\"Mean\"].values.tolist()\n",
    "councilors_topics[\"Lower CI\"] = plot_data[\"Lower CI\"].values.tolist()\n",
    "councilors_topics[\"Upper CI\"] = plot_data[\"Upper CI\"].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 542
    },
    "id": "YpAd5YXxnDF6",
    "outputId": "ae367283-3951-4ead-8561-3e5fed975ee9"
   },
   "outputs": [],
   "source": [
    "# Extract concerns and append them to a list for citizens and councilors\n",
    "citizens_concerns = citizens_topics[\"index\"].values.tolist()\n",
    "councilors_concernss = councilors_topics[\"index\"].values.tolist()\n",
    "\n",
    "# Create figure\n",
    "fig = go.Figure()\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Citizens\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.Total,\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"crimson\",\n",
    "    )\n",
    ")\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Councilors\",\n",
    "        x=councilors_concernss,\n",
    "        y=councilors_topics.Total,\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"pink\",\n",
    "        marker_pattern_shape=\"\",\n",
    "    )\n",
    ")\n",
    "\n",
    "\n",
    "# Change the bar mode\n",
    "fig.update_layout(\n",
    "    title_x=0.5, barmode=\"group\", template=\"plotly_white\", font=dict(size=22)\n",
    ")\n",
    "\n",
    "fig.update_layout(\n",
    "    annotations=[\n",
    "        dict(\n",
    "            text=\"Frequency of mentions\",\n",
    "            x=-0.05,  # Adjust the position as needed\n",
    "            y=1.05,  # Position above the plot (1.05 places it above the plot area)\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            showarrow=False,\n",
    "            font=dict(size=22, color=\"black\"),  # Font size and color\n",
    "            xanchor=\"left\",  # Anchor the text to the left\n",
    "            yanchor=\"bottom\",  # Anchor the text at the bottom\n",
    "        )\n",
    "    ]\n",
    ")\n",
    "\n",
    "fig.update_xaxes(tickfont=dict(size=22))\n",
    "\n",
    "fig.write_html(\n",
    "    file=\"../figures/Figure_7.html\",\n",
    ")\n",
    "\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "E1OugHFmnDF6"
   },
   "outputs": [],
   "source": [
    "# Copy citizes_pol_orient_df\n",
    "citizens_topics = citizens_pol_orient_df.copy()\n",
    "\n",
    "# Get topic frequencies\n",
    "citizens_topics.iloc[0, :] = (\n",
    "    citizens_topics.iloc[0, :]\n",
    "    / citizens_df[\n",
    "        (citizens_df.pol_orient == \"Αριστεροί\")\n",
    "        & (citizens_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[1, :] = (\n",
    "    citizens_topics.iloc[1, :]\n",
    "    / citizens_df[\n",
    "        (citizens_df.pol_orient == \"Δεξιοί\")\n",
    "        & (citizens_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "\n",
    "# Copy councilors_pol_orient_df\n",
    "councilors_topics = councilors_pol_orient_df.copy()\n",
    "\n",
    "# Get topic frequencies\n",
    "councilors_topics.iloc[0, :] = (\n",
    "    councilors_topics.iloc[0, :]\n",
    "    / councilors_df[\n",
    "        (councilors_df.pol_orient_x == \"Αριστεροί\")\n",
    "        & (councilors_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "councilors_topics.iloc[1, :] = (\n",
    "    councilors_topics.iloc[1, :]\n",
    "    / councilors_df[\n",
    "        (councilors_df.pol_orient_x == \"Δεξιοί\")\n",
    "        & (councilors_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 8: Topic analysis on citizens and councilors by their political orientation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 542
    },
    "id": "sNa_Pt-qnDF7",
    "outputId": "864d51e7-dd78-4e35-9ef6-fcbe4c6e8204"
   },
   "outputs": [],
   "source": [
    "# Extract concerns and append them to a list for citizens and councilors\n",
    "citizens_concerns = citizens_topics.columns.tolist()\n",
    "councilors_concerns = councilors_topics.columns.tolist()\n",
    "\n",
    "# Create figure\n",
    "fig = go.Figure()\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Left-wing citizens\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[0, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"crimson\",\n",
    "    )\n",
    ")\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Left-wing councilors\",\n",
    "        x=councilors_concerns,\n",
    "        y=councilors_topics.reset_index().iloc[0, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"pink\",\n",
    "        marker_pattern_shape=\"\",\n",
    "    )\n",
    ")\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Right-wing citizens\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[1, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"royalblue\",\n",
    "    )\n",
    ")\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Right-wing councilors\",\n",
    "        x=councilors_concerns,\n",
    "        y=councilors_topics.reset_index().iloc[1, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"lightblue\",\n",
    "        marker_pattern_shape=\"\",\n",
    "    )\n",
    ")\n",
    "\n",
    "\n",
    "# Change the bar mode\n",
    "fig.update_layout(\n",
    "    title_x=0.5, barmode=\"group\", template=\"plotly_white\", font=dict(size=22)\n",
    ")\n",
    "\n",
    "fig.update_layout(\n",
    "    annotations=[\n",
    "        dict(\n",
    "            text=\"Frequency of mentions\",\n",
    "            x=-0.05,  # Adjust the position as needed\n",
    "            y=1.05,  # Position above the plot (1.05 places it above the plot area)\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            showarrow=False,\n",
    "            font=dict(size=22, color=\"black\"),  # Font size and color\n",
    "            xanchor=\"left\",  # Anchor the text to the left\n",
    "            yanchor=\"bottom\",  # Anchor the text at the bottom\n",
    "        )\n",
    "    ]\n",
    ")\n",
    "\n",
    "fig.update_xaxes(tickfont=dict(size=22))\n",
    "\n",
    "fig.write_html(\n",
    "    file=\"../figures/Figure_8.html\",\n",
    ")\n",
    "\n",
    "# Show figure\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read updated dataset containing columns 'exposed' and 'intensity'\n",
    "imm_pop_path = config_dict[\"immigrants populations excel path\"]\n",
    "imm_pop = pd.read_excel(imm_pop_path)\n",
    "\n",
    "# Map exposed, intensity and political orientation values\n",
    "exposed = {0: \"Non exposed\", 1: \"Exposed\"}\n",
    "intensity = {1: \"Not exposed\", 2: \"Low exposure\", 3: \"High exposure\"}\n",
    "\n",
    "imm_pop.exposed = imm_pop.exposed.apply(lambda x: exposed[x])\n",
    "imm_pop.intensity = imm_pop.intensity.apply(lambda x: intensity[x])\n",
    "imm_pop.pol_orient = imm_pop.pol_orient.apply(\n",
    "    lambda x: \"Right-wing\" if x > 4 else \"Left-wing\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Concatenate matrices to then original dataframes\n",
    "imm_pop_df = pd.concat(\n",
    "    [imm_pop, pd.DataFrame(citizens_matrix, columns=new_cols)], axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group each dataframe by exposure column\n",
    "imm_pop_exposure = imm_pop_df.groupby(\"exposed\")[new_cols].sum()\n",
    "\n",
    "# Apply feature engineering by creating a column \"Total\" representing the sum of topics values\n",
    "imm_pop_exposure.loc[\"Total\"] = imm_pop_exposure.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get citizens topics\n",
    "citizens_topics = imm_pop_exposure.copy()\n",
    "\n",
    "# Get topic frequencies\n",
    "citizens_topics.iloc[0, :] = (\n",
    "    citizens_topics.iloc[0, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df.exposed == \"Exposed\") & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[1, :] = (\n",
    "    citizens_topics.iloc[1, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df.exposed == \"Non exposed\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read dataset\n",
    "imm_pop = pd.read_excel(imm_pop_path)\n",
    "\n",
    "# Map exposure, intensity and political orientation\n",
    "imm_pop.pol_orient = imm_pop.pol_orient.apply(\n",
    "    lambda x: \"Right-wing\" if x > 4 else \"Left-wing\"\n",
    ")\n",
    "imm_pop.exposed = imm_pop.exposed.apply(lambda x: {0: \"No camp\", 1: \"Camp\"}[x])\n",
    "imm_pop.loc[\n",
    "    imm_pop.Q26_residence_2.isin(\n",
    "        [\"Mytilinis\", \"Anatolikis Samou\", \"Lerou\", \"Chiou\", \"Ko\", \"Orestiadas\"]\n",
    "    ),\n",
    "    \"exposed\",\n",
    "] = \"RIC\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Concatenate matrices to then original dataframes\n",
    "imm_pop_df = pd.concat(\n",
    "    [imm_pop, pd.DataFrame(citizens_matrix, columns=new_cols)], axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group each dataframe by exposure column\n",
    "imm_pop_exposure = imm_pop_df.groupby(\"exposed\")[new_cols].sum()\n",
    "\n",
    "# Apply feature engineering by creating a column \"Total\" representing the sum of topics values\n",
    "imm_pop_exposure.loc[\"Total\"] = imm_pop_exposure.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get citizens topics\n",
    "citizens_topics = imm_pop_exposure.copy()\n",
    "\n",
    "# Get topic frequencies\n",
    "citizens_topics.iloc[0, :] = (\n",
    "    citizens_topics.iloc[0, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df.exposed == \"Camp\") & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[1, :] = (\n",
    "    citizens_topics.iloc[1, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df.exposed == \"No camp\") & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[2, :] = (\n",
    "    citizens_topics.iloc[2, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df.exposed == \"RIC\") & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 12: Exposure intensity, citizens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get concerns\n",
    "citizens_concerns = citizens_topics.columns.tolist()\n",
    "\n",
    "# Add figure\n",
    "fig = go.Figure()\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"No camp\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[1, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"blue\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Camp\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[0, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"pink\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"RIC\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[2, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"red\",\n",
    "    )\n",
    ")\n",
    "\n",
    "# Change the bar mode\n",
    "fig.update_layout(\n",
    "    title_x=0.5, barmode=\"group\", template=\"plotly_white\", font=dict(size=22)\n",
    ")\n",
    "\n",
    "fig.update_layout(\n",
    "    annotations=[\n",
    "        dict(\n",
    "            text=\"Frequency of mentions\",\n",
    "            x=-0.05,  # Adjust the position as needed\n",
    "            y=1.05,  # Position above the plot (1.05 places it above the plot area)\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            showarrow=False,\n",
    "            font=dict(size=22, color=\"black\"),  # Font size and color\n",
    "            xanchor=\"left\",  # Anchor the text to the left\n",
    "            yanchor=\"bottom\",  # Anchor the text at the bottom\n",
    "        )\n",
    "    ]\n",
    ")\n",
    "\n",
    "fig.update_xaxes(tickfont=dict(size=22))\n",
    "\n",
    "fig.write_html(\n",
    "    file=\"../figures/Figure_12.html\",\n",
    ")\n",
    "\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read dataset\n",
    "imm_pop = pd.read_excel(imm_pop_path)\n",
    "\n",
    "# Map exposure, intensity and political orientation\n",
    "imm_pop.pol_orient = imm_pop.pol_orient.apply(\n",
    "    lambda x: \"Right-wing\" if x > 4 else \"Left-wing\"\n",
    ")\n",
    "imm_pop.exposed = imm_pop.exposed.apply(lambda x: {0: \"No camp\", 1: \"Camp\"}[x])\n",
    "imm_pop.loc[\n",
    "    imm_pop.Q26_residence_2.isin(\n",
    "        [\"Mytilinis\", \"Anatolikis Samou\", \"Lerou\", \"Chiou\", \"Ko\", \"Orestiadas\"]\n",
    "    ),\n",
    "    \"exposed\",\n",
    "] = \"RIC\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a list containing citizens grouped by their political orientation and exposure\n",
    "imm_pol_exposure = []\n",
    "\n",
    "for i in range(len(imm_pop)):\n",
    "    political_orient = imm_pop.pol_orient.iloc[i]\n",
    "    exposure = imm_pop.exposed.iloc[i]\n",
    "    outcome = political_orient + \" | \" + exposure\n",
    "    imm_pol_exposure.append(outcome)\n",
    "\n",
    "imm_pop[\"pol and exposed\"] = imm_pol_exposure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Concatenate matrices to then original dataframes\n",
    "imm_pop_df = pd.concat(\n",
    "    [imm_pop, pd.DataFrame(citizens_matrix, columns=new_cols)], axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group each dataframe by exposure column\n",
    "imm_pop_exposure = imm_pop_df.groupby(\"pol and exposed\")[new_cols].sum()\n",
    "\n",
    "# Apply feature engineering by creating a column \"Total\" representing the sum of topics values\n",
    "imm_pop_exposure.loc[\"Total\"] = imm_pop_exposure.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get citizens topics\n",
    "citizens_topics = imm_pop_exposure.copy()\n",
    "\n",
    "# Get topic frequencies\n",
    "citizens_topics.iloc[0, :] = (\n",
    "    citizens_topics.iloc[0, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df[\"pol and exposed\"] == \"Left-wing | Camp\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[1, :] = (\n",
    "    citizens_topics.iloc[1, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df[\"pol and exposed\"] == \"Left-wing | No camp\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[2, :] = (\n",
    "    citizens_topics.iloc[2, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df[\"pol and exposed\"] == \"Left-wing | RIC\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[3, :] = (\n",
    "    citizens_topics.iloc[3, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df[\"pol and exposed\"] == \"Right-wing | Camp\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[4, :] = (\n",
    "    citizens_topics.iloc[4, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df[\"pol and exposed\"] == \"Right-wing | No camp\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")\n",
    "citizens_topics.iloc[5, :] = (\n",
    "    citizens_topics.iloc[5, :]\n",
    "    / imm_pop_df[\n",
    "        (imm_pop_df[\"pol and exposed\"] == \"Right-wing | RIC\")\n",
    "        & (imm_pop_df.iloc[:, -9:].sum(axis=1) != 0)\n",
    "    ].shape[0]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Figure 13: Exposure intensity by political orientation, citizens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get concerns\n",
    "citizens_concerns = citizens_topics.columns.tolist()\n",
    "\n",
    "# Add figure\n",
    "fig = go.Figure()\n",
    "\n",
    "# Add labels\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Left-wing | No camp\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[1, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"yellow\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Right-wing | No camp\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[4, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"green\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Left-wing | Camp\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[0, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"pink\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Right-wing | Camp\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[3, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"cyan\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Left-wing | RIC\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[2, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"red\",\n",
    "    )\n",
    ")\n",
    "\n",
    "fig.add_trace(\n",
    "    go.Bar(\n",
    "        name=\"Right-wing | RIC\",\n",
    "        x=citizens_concerns,\n",
    "        y=citizens_topics.reset_index().iloc[5, 1:],\n",
    "        textposition=\"outside\",\n",
    "        marker_color=\"blue\",\n",
    "    )\n",
    ")\n",
    "\n",
    "# Change the bar mode\n",
    "fig.update_layout(\n",
    "    title_x=0.5, barmode=\"group\", template=\"plotly_white\", font=dict(size=22)\n",
    ")\n",
    "\n",
    "fig.update_layout(\n",
    "    annotations=[\n",
    "        dict(\n",
    "            text=\"Frequency of mentions\",\n",
    "            x=-0.05,  # Adjust the position as needed\n",
    "            y=1.05,  # Position above the plot (1.05 places it above the plot area)\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            showarrow=False,\n",
    "            font=dict(size=22, color=\"black\"),  # Font size and color\n",
    "            xanchor=\"left\",  # Anchor the text to the left\n",
    "            yanchor=\"bottom\",  # Anchor the text at the bottom\n",
    "        )\n",
    "    ]\n",
    ")\n",
    "\n",
    "fig.update_xaxes(tickfont=dict(size=22))\n",
    "\n",
    "fig.write_html(\n",
    "    file=\"../figures/Figure_13.html\",\n",
    ")\n",
    "\n",
    "fig.show()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "hbs_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.16"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "384px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
